Setup

library(tidyverse)
library(lubridate)
library(leaflet)
incidents <- feather::read_feather("incidents.feather")
drivers <- feather::read_feather("drivers.feather")
non_motorists <- feather::read_feather("non_motorists.feather")
single_color <- "#3182bd"
basic_theme <- theme(
  plot.title = element_text(size = 24, vjust = .5, hjust = .5),
  axis.title = element_text(size = 16, hjust = .5, vjust = .5),
  panel.background = element_rect(fill = "white"),
  legend.position = "none",
  panel.grid.major.y = element_line(color = "grey75", size = .1, linetype = "solid")
)

Incidents

This is a quick overview of the structure of the Incidents table.

skimr::skim_to_wide(incidents) %>% DT::datatable()

There have been 143 fatal crashes out of 57162 recorded incidents since this data set began in 2015.

incidents %>%
  group_by(acrs_report_type) %>%
  count() %>%
  knitr::kable()
acrs_report_type n
Fatal Crash 143
Injury Crash 20454
Property Damage Crash 36565

Location of Incidents

This interactive map shows the locations of the incidents in the data set. The 106 incidents that do not fall within the limits of the county are colored red.

pal <- colorFactor(c("blue", "red"), domain = c(0, 1))
incidents %>%
  leaflet() %>%
  addTiles() %>%
  addCircleMarkers(lng = ~longitude, lat = ~latitude, radius = 3, stroke = TRUE, weight = 2, opacity = 1, color = ~ pal(not_in_county))

Time variables

Crashes peak around 5:00 PM, which lines up with the evening rush hour. It seems that the report times are often rounded to the nearest fifteen or five minute round number.

incidents %>%
  select(incident_hour, incident_minute) %>%
  mutate(incident_time = 60 * incident_hour + incident_minute) %>%
  ggplot() +
  stat_count(aes(x = incident_time), fill = single_color) +
  scale_x_continuous(
    name = "Time",
    breaks = seq.int(from = 0, to = 24 * 60, by = 120),
    labels = c(paste0(seq.int(from = 0, to = 22, by = 2), ":00"), "0:00")
  ) +
  ggtitle("Crashes by Time of Day") +
  basic_theme

Crashes are slightly less frequent on the weekends than on weekdays.

incidents %>%
  select(incident_weekday) %>%
  ggplot() +
  stat_count(aes(x = incident_weekday), fill = single_color) +
  scale_x_continuous(
    name = "Day of Week",
    breaks = 1:7,
    labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thurday", "Friday", "Saturday")
  ) +
  ggtitle("Crashes by Day of Week") +
  basic_theme

The number of crashes seems to be fairly constant over time.

incidents %>%
  select(incident_year) %>%
  ggplot() +
  stat_count(aes(x = incident_year), fill = single_color) +
  ggtitle("Crashes by Year") +
  basic_theme

This table contains the number of crashes recorded on each date. 2019 is excluded because the year hasn’t concluded and the counts for dates that already have happened would be inflated. The dates with the most crashes tend to come from the last four months of the year, but holidays including Christmas and New Years are among the days with the fewest crashes.

incidents %>%
  filter(incident_year < 2019) %>%
  select(incident_month, incident_day) %>%
  mutate(incident_date = paste0(incident_month, "/", incident_day)) %>%
  count(incident_date) %>%
  arrange(desc(n)) %>%
  rename(Date = incident_date, `Number of Crashes` = n) %>%
  DT::datatable()

Weather

Most crashes come under clear weather conditions.

incidents %>%
  group_by(weather) %>%
  count() %>%
  arrange(desc(n)) %>%
  knitr::kable()
weather n
CLEAR 37602
RAINING 7005
CLOUDY 5972
N/A 4814
SNOW 676
UNKNOWN 352
FOGGY 228
WINTRY MIX 171
OTHER 140
SLEET 77
SEVERE WINDS 64
BLOWING SNOW 54
BLOWING SAND SOIL DIRT 7

Substance Abuse

Crashes involving alcohol and illegal drugs constitute a small minority of the total incidents.

incidents %>%
  mutate(alcohol = driver_substance_abuse_alcohol_present | driver_substance_abuse_alcohol_contributed) %>%
  count(alcohol) %>%
  knitr::kable()
alcohol n
FALSE 53944
TRUE 3218
incidents %>%
  count(driver_substance_abuse_alcohol_contributed) %>%
  knitr::kable()
driver_substance_abuse_alcohol_contributed n
0 56292
1 870
incidents %>%
  count(driver_substance_abuse_alcohol_present) %>%
  knitr::kable()
driver_substance_abuse_alcohol_present n
0 54811
1 2351
incidents %>%
  mutate(illegal_drug = driver_substance_abuse_illegal_drug_present | driver_substance_abuse_illegal_drug_contributed) %>%
  count(illegal_drug) %>%
  knitr::kable()
illegal_drug n
FALSE 56943
TRUE 219
incidents %>%
  count(driver_substance_abuse_illegal_drug_contributed) %>%
  knitr::kable()
driver_substance_abuse_illegal_drug_contributed n
0 57116
1 46
incidents %>%
  count(driver_substance_abuse_illegal_drug_present) %>%
  knitr::kable()
driver_substance_abuse_illegal_drug_present n
0 56989
1 173

Alcohol and Time

Crashes involving alcohol do not follow the general time of day trends of crashes as a whole. The late night and early morning hours see more crashes than the daylight hours.

incidents %>%
  filter(driver_substance_abuse_alcohol_present | driver_substance_abuse_alcohol_contributed) %>%
  select(incident_hour, incident_minute) %>%
  mutate(incident_time = 60 * incident_hour + incident_minute) %>%
  ggplot() +
  stat_count(aes(x = incident_time), fill = single_color) +
  scale_x_continuous(
    name = "Time",
    breaks = seq.int(from = 0, to = 24 * 60, by = 120),
    labels = paste0(seq.int(from = 0, to = 24, by = 2), ":00")
  ) +
  ggtitle("Crashes Involving Alcohol by Time of Day") +
  basic_theme

Weekends see more alchol related crashes than weekdays, the opposite of the trend for all crashes.

incidents %>%
  filter(driver_substance_abuse_alcohol_present | driver_substance_abuse_alcohol_contributed) %>%
  select(incident_weekday) %>%
  ggplot() +
  stat_count(aes(x = incident_weekday), fill = single_color) +
  scale_x_continuous(
    name = "Day of Week",
    breaks = 1:7,
    labels = c("Sunday", "Monday", "Tuesday", "Wednesday", "Thurday", "Friday", "Saturday")
  ) +
  ggtitle("Crashes Involving Alcohol by Day of Week") +
  basic_theme

Drivers

Overview of the drivers table structure.

skimr::skim_to_wide(drivers) %>% DT::datatable()

Driver Injuries

81.31% of drivers did not sustain any reported injuries. 0.91% of Drivers suffered serious or fatal injuries.

ordered_levels <- drivers %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
drivers %>%
  mutate(`Injury Severity` = factor(injury_severity, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = `Injury Severity`), fill = single_color) +
  ggtitle("Severity of Driver Injuries") +
  basic_theme

drivers %>%
  count(injury_severity) %>%
  arrange(n) %>%
  knitr::kable()
injury_severity n
FATAL INJURY 70
SUSPECTED SERIOUS INJURY 867
SUSPECTED MINOR INJURY 7658
POSSIBLE INJURY 10452
NO APPARENT INJURY 82901

Vehichle Damage

Most reported accidents involved at least some vehicle damage. Just 3.73% of drivers reported no vehicle damage.

ordered_levels <- drivers %>%
  count(vehicle_damage_extent) %>%
  arrange(desc(n)) %>%
  pull(vehicle_damage_extent)
drivers %>%
  mutate(`Vehicle Damage` = factor(vehicle_damage_extent, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = `Vehicle Damage`), fill = single_color) +
  ggtitle("Severity of Vehicle Damage") +
  basic_theme

drivers %>%
  count(vehicle_damage_extent) %>%
  arrange(desc(n)) %>%
  knitr::kable()
vehicle_damage_extent n
DISABLING 36101
FUNCTIONAL 27579
SUPERFICIAL 26731
DESTROYED 4002
NO DAMAGE 3812
UNKNOWN 3480
N/A 185
OTHER 58

Speed Limits

drivers %>%
  ggplot() +
  stat_count(aes(x = speed_limit, fill = ), fill = single_color) +
  ggtitle("Speed Limits") +
  basic_theme

ordered_injury <- drivers %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
drivers %>%
  mutate(
    `Speed Limit` = factor(c("LOW", "MEDIUM", "HIGH")[1 + findInterval(speed_limit, c(31, 46))], levels = c("LOW", "MEDIUM", "HIGH")),
    `Injury Severity` = factor(injury_severity, ordered_injury)
  ) %>%
  ggplot() +
  stat_count(aes(x = `Injury Severity`, fill = `Speed Limit`)) +
  basic_theme +
  theme(legend.position = c(.9, .9)) +
  ggtitle("Injuries and Speed Limits")

ordered_injury <- drivers %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
drivers %>%
  mutate(
    speed_limit = factor(c("LOW", "MEDIUM", "HIGH")[1 + findInterval(speed_limit, c(31, 46))], levels = c("LOW", "MEDIUM", "HIGH")),
    injury_severity = factor(injury_severity, ordered_injury)
  ) %>%
  group_by(speed_limit, injury_severity) %>%
  tally() %>%
  rename(`Speed Limit` = speed_limit, `Injury Severity` = injury_severity) %>%
  ggplot() +
  geom_bar(aes(x = `Injury Severity`, weight = n, fill = `Speed Limit`), position = "fill") +
  basic_theme +
  theme(legend.position = "top") +
  ggtitle("Injuries and Speed Limits") +
  ylab("Proportion")

Types of Vehicles

ordered_levels <- drivers %>%
  count(vehicle_body_type) %>%
  arrange(desc(n)) %>%
  pull(vehicle_body_type)
drivers %>%
  mutate(`Body Type` = factor(as.character(fct_collapse(vehicle_body_type, OTHER = ordered_levels[-(1:5)])), c(ordered_levels[1:5], "OTHER"))) %>%
  ggplot() +
  stat_count(aes(x = `Body Type`), fill = single_color) +
  ggtitle("Vehicle Body Types") +
  basic_theme

Non-Motorists

Overview of Non-Motorists table structure

skimr::skim_to_wide(non_motorists) %>% DT::datatable()

Types of Non Motorists

Pedestrians are the most common type of non motorist, followed by bicyclists.

ordered_levels <- non_motorists %>%
  count(pedestrian_type) %>%
  arrange(desc(n)) %>%
  pull(pedestrian_type)
non_motorists %>%
  mutate(pedestrian_type = factor(pedestrian_type, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = pedestrian_type), fill = single_color) +
  basic_theme +
  ggtitle("Types of Non-Motorists")

Locations of Incidents Involving Non-Motorists

pal <- colorFactor(c("blue", "red"), domain = c(0, 1))
non_motorists %>%
  leaflet() %>%
  addTiles() %>%
  addCircleMarkers(lng = ~longitude, lat = ~latitude, radius = 3, stroke = TRUE, weight = 2, opacity = 1, color = ~ pal(not_in_county))

Injuries

8.62% of non-motorists did not sustain any reported injuries. 13.05% of non-motorists suffered serious or fatal injuries.

ordered_levels <- non_motorists %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
non_motorists %>%
  mutate(`Injury Severity` = factor(injury_severity, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = `Injury Severity`), fill = single_color) +
  ggtitle("Severity of Non Motorist Injuries") +
  basic_theme

non_motorists %>%
  count(injury_severity) %>%
  arrange(n) %>%
  knitr::kable()
injury_severity n
FATAL INJURY 63
NO APPARENT INJURY 279
SUSPECTED SERIOUS INJURY 359
POSSIBLE INJURY 1072
SUSPECTED MINOR INJURY 1460

Bicycle Helmet Usage

ordered_levels <- non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  count(safety_equipment) %>%
  arrange(desc(n)) %>%
  pull(safety_equipment)
non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  mutate(`Safety Equipment` = factor(safety_equipment, ordered_levels)) %>%
  ggplot() +
  stat_count(aes(x = `Safety Equipment`), fill = single_color) +
  basic_theme +
  ggtitle("What Kinds of Safety Equiptment Do Bicyclists Use?")

The more severe injuries tend to have lower rates of helmet usage than minor injuries.

ordered_safety <- non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  count(safety_equipment) %>%
  arrange(desc(n)) %>%
  pull(safety_equipment)
ordered_injury <- non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  count(injury_severity) %>%
  arrange(desc(n)) %>%
  pull(injury_severity)
non_motorists %>%
  filter(pedestrian_type == "BICYCLIST") %>%
  mutate(
    `Safety Equipment` = fct_collapse(safety_equipment, OTHER = ordered_safety[-(1:2)]),
    `Injury Severity` = factor(injury_severity, ordered_injury)
  ) %>%
  ggplot() +
  stat_count(aes(x = `Injury Severity`, fill = `Safety Equipment`)) +
  basic_theme +
  theme(legend.position = c(.9, .9)) +
  ggtitle("Injuries and Helmets")

Consistency Check

fatal_drivers <- drivers %>%
  filter(injury_severity_fatal_injury == 1) %>%
  pull(report_number)
fatal_nonm <- non_motorists %>%
  filter(injury_severity_fatal_injury == 1) %>%
  pull(report_number)
fatal_acrs <- incidents %>%
  filter(acrs_report_type_fatal_crash == 1) %>%
  pull(report_number)
recorded_fatal <- c(fatal_drivers, fatal_nonm)
# vector of fatal reports with no recorded fatality
probs <- fatal_acrs %>% .[!. %in% recorded_fatal]
incidents %>%
  filter(report_number %in% probs) %>%
  DT::datatable(options = list(pageLength = 5))
drivers %>%
  filter(report_number %in% probs) %>%
  DT::datatable(options = list(pageLength = 5))
non_motorists %>%
  filter(report_number %in% probs) %>%
  DT::datatable(options = list(pageLength = 5))
drivers %>%
  group_by(report_number) %>%
  tally() %>%
  arrange(desc(n))
## # A tibble: 56,779 x 2
##    report_number     n
##    <chr>         <int>
##  1 MCP12130045       9
##  2 MCP2667000H       8
##  3 MCP1227000M       7
##  4 MCP15800085       7
##  5 MCP23580027       7
##  6 MCP2513001C       7
##  7 MCP2617006L       7
##  8 MCP3140000Y       7
##  9 MCP9130001S       7
## 10 MCP9422000Z       7
## # ... with 56,769 more rows
drivers %>%
  count(report_number) %>%
  count(n) %>%
  rename(`Vehicles Involved` = n) %>%
  knitr::kable()
Vehicles Involved nn
1 17614
2 34187
3 4133
4 704
5 114
6 17
7 8
8 1
9 1
non_motorists %>%
  group_by(report_number) %>%
  tally() %>%
  arrange(desc(n))
## # A tibble: 3,073 x 2
##    report_number     n
##    <chr>         <int>
##  1 MCP229800LT       4
##  2 MCP2546002K       4
##  3 DD5603004V        3
##  4 DM8445001H        3
##  5 DM8457000R        3
##  6 DM8463000L        3
##  7 EJ7809000W        3
##  8 EJ7833003W        3
##  9 MCP1438002S       3
## 10 MCP20080044       3
## # ... with 3,063 more rows
non_motorists %>%
  count(report_number) %>%
  count(n) %>%
  rename(`non_motorists Involved` = n) %>%
  knitr::kable()
non_motorists Involved nn
1 2938
2 112
3 21
4 2